import mlflow
import pandas as pd

def generate_recommendations_table_with_best(experiment_ids, group_type="sim"):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            output_format="list"
        )
        for run in runs:
            if run.data.params.get("group_set") != 'test' or run.data.params.get("group_type") != group_type:
                continue

            dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
            approach = run.data.params.get("recommender_strategy", None)
            aggregation = run.data.params.get("SAE_fusion_strategy", None)

            row_key = (approach, aggregation)
            metrics = {
                (dataset, "G/mean"): run.data.metrics.get("CommonItemsNDCG20/median"),
                (dataset, "U/mean"): run.data.metrics.get("NDCG20/mean"),
                (dataset, "U/min"): run.data.metrics.get("NDCG20/min"),
                (dataset, "Pop"): run.data.metrics.get("Popularity/mean"),
            }

            all_rows.append((row_key, metrics))

    # Build DataFrame from records
    records = {}
    for key, metrics in all_rows:
        if key not in records:
            records[key] = {}
        records[key].update(metrics)

    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.names = ["Approach", "Aggregation"]

    # Sort and reindex columns by dataset then metric
    df = df.sort_index(axis=1, level=[0, 1]).sort_values(
        by=("MovieLens", "G/mean"), ascending=False
    ).reset_index()

    return df.reset_index().round(3)

Comparing with other approaches

Group Recommendations Results for Similar groups sorted by MovieLens G/mean

experiment_ids = ['523100174176986081', '333391697323445885']

df = generate_recommendations_table_with_best(experiment_ids, group_type="sim")
df
index Approach Aggregation LastFM1k MovieLens
G/mean Pop U/mean U/min G/mean Pop U/mean U/min
0 0 SAE wcom 0.743 0.660 0.817 0.620 0.634 0.538 0.702 0.564
1 1 SAE max 0.745 0.638 0.812 0.655 0.630 0.547 0.685 0.561
2 2 SAE topk 0.778 0.645 0.813 0.625 0.621 0.556 0.713 0.569
3 3 ELSA average 0.731 0.646 0.797 0.588 0.616 0.547 0.693 0.553
4 4 ADD None 0.738 0.653 0.814 0.639 0.612 0.542 0.705 0.568
5 5 SAE common_features 0.726 0.654 0.811 0.669 0.608 0.504 0.661 0.528
6 6 SAE average 0.744 0.651 0.822 0.660 0.590 0.546 0.703 0.571
7 7 SAE square_average 0.712 0.661 0.812 0.655 0.581 0.544 0.689 0.536
8 8 EPFuzzDA None 0.772 0.643 0.811 0.646 0.531 0.539 0.702 0.568
9 9 ELSA_INT average 0.712 0.552 0.741 0.497 0.510 0.473 0.626 0.469
10 10 LMS None 0.815 0.632 0.804 0.653 0.481 0.532 0.676 0.527
11 11 MPL None 0.562 0.586 0.749 0.558 0.421 0.475 0.605 0.487
12 12 GFAR None 0.433 0.521 0.670 0.495 0.157 0.396 0.517 0.368

Group Recommendations Results for Random groups sorted by MovieLens G/mean

experiment_ids = ['523100174176986081', '333391697323445885']

df = generate_recommendations_table_with_best(experiment_ids, group_type="random")
df
index Approach Aggregation LastFM1k MovieLens
G/mean Pop U/mean U/min G/mean Pop U/mean U/min
0 0 SAE common_features 0.567 0.663 0.729 0.526 0.757 0.528 0.641 0.491
1 1 SAE max 0.490 0.629 0.746 0.536 0.736 0.570 0.679 0.543
2 2 SAE topk 0.446 0.649 0.737 0.523 0.726 0.574 0.673 0.540
3 3 EPFuzzDA None 0.423 0.625 0.726 0.544 0.714 0.549 0.671 0.540
4 4 ADD None 0.430 0.651 0.739 0.515 0.684 0.565 0.678 0.544
5 5 SAE average 0.456 0.654 0.752 0.546 0.662 0.562 0.678 0.544
6 6 SAE wcom 0.493 0.663 0.741 0.527 0.613 0.546 0.668 0.536
7 7 ELSA average 0.482 0.648 0.698 0.455 0.601 0.565 0.670 0.513
8 8 LMS None 0.649 0.592 0.684 0.488 0.538 0.537 0.650 0.517
9 9 MPL None 0.202 0.521 0.622 0.456 0.457 0.469 0.576 0.435
10 10 GFAR None 0.072 0.454 0.554 0.372 0.358 0.396 0.460 0.312
11 11 SAE square_average 0.546 0.671 0.752 0.549 0.253 0.556 0.650 0.500
12 12 ELSA_INT average 0.320 0.546 0.651 0.402 0.069 0.477 0.596 0.418

Group Recommendations Results for divergent groups sorted by MovieLens G/mean

experiment_ids = ['523100174176986081', '333391697323445885']

df = generate_recommendations_table_with_best(experiment_ids, group_type="div")
df
index Approach Aggregation LastFM1k MovieLens
G/mean Pop U/mean U/min G/mean Pop U/mean U/min
0 0 LMS None 0.267 0.485 0.559 0.326 0.377 0.315 0.487 0.355
1 1 EPFuzzDA None 0.231 0.545 0.633 0.429 0.374 0.376 0.583 0.435
2 2 ELSA average 0.000 0.554 0.602 0.255 0.303 0.278 0.525 0.257
3 3 ADD None 0.108 0.608 0.666 0.435 0.303 0.419 0.615 0.436
4 4 ELSA_INT average 0.000 0.277 0.425 0.071 0.079 0.086 0.336 0.033
5 5 SAE square_average 0.195 0.666 0.704 0.478 0.077 0.442 0.586 0.383
6 6 SAE common_features 0.073 0.672 0.696 0.460 0.077 0.294 0.438 0.230
7 7 SAE average 0.165 0.627 0.691 0.479 0.076 0.423 0.610 0.436
8 8 SAE max 0.167 0.617 0.693 0.479 0.073 0.402 0.592 0.438
9 9 SAE wcom 0.087 0.627 0.690 0.468 0.000 0.428 0.602 0.441
10 10 SAE topk 0.091 0.621 0.668 0.438 0.000 0.480 0.630 0.479
11 11 MPL None 0.000 0.386 0.456 0.290 0.000 0.240 0.422 0.237
12 12 GFAR None 0.000 0.337 0.442 0.294 0.000 0.238 0.373 0.231